
clear all
set more off

** Change path to re-run on another computer
cd ""
**


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
* 1) Import fuel use by country and sector and fuel type
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

insheet using "IEA Energy Balances/updated2016_energy balances/IEA_WEB_QG_2_25072017132503498.csv", clear  

drop product flow unit v10 flagcodes flags  v8 location

rename (v4 v6 time value)  (product sector year ktoe)

*harmonize names
replace country="Korea, Republic of" if country=="Korea"
replace country="Slovakia" if country=="Slovak Republic"
replace country="United States of America" if country=="United States"
replace country="Venezuela (Bolivarian Republic of)" if country=="Venezuela"
replace country="China" if country=="People's Republic of China"
replace country="Taiwan, Republic of China" if country=="Chinese Taipei"
replace country="Vietnam" if country=="Viet Nam"
replace country="Cote d'Ivoire" if country=="C√¥te d'Ivoire" | cou=="Côte d'Ivoire"
replace country="Curaçao" if country=="Cura√ßao"
replace country="United Republic of Tanzania" if country=="Tanzania"
replace country="Republic of Moldova" if country=="Moldova"
replace country="Hong Kong, China" if country=="Hong Kong (China)"
replace country="Democratic People's Republic of Korea" if country=="Democratic People's Republic of Korea"
replace country="Democratic Republic of Congo" if country=="Democratic Republic of the Congo"

*drop 
drop if country=="Suriname" | country=="South Sudan" | country=="Niger" | country=="Mauritius" | country=="Curaçao"
compress


* droping country groups
drop if cou=="World" | cou=="OECD Americas" | cou=="OECD Asia Oceania" | cou=="OECD Europe" | cou=="Africa" | cou=="Non-OECD Americas" | cou=="Middle East" | cou=="Non-OECD Europe and Eurasia" | cou=="Asia (excluding China)" | cou=="China (P.R. of China and Hong Kong, China)" | cou=="World marine bunkers" | cou=="World aviation bunkers"  
drop if cou=="Former Soviet Union (If no detail)" | cou=="Former Yugoslavia (If no detail)" | cou=="Other Africa" | cou=="Other Non-OECD Americas" | cou=="Other Asia" | cou=="Memo: OECD Total" | cou=="Memo: Non-OECD Total" | cou=="Memo: IEA Total" | cou=="Memo: European Union-28" | cou=="Memo: FSU 15" | cou=="Memo: Former Yugoslavia" | cou=="Memo: OPEC" | cou=="Memo: G7" | cou=="Memo: G8" | cou=="Memo: G20"

*****
replace ktoe=. if ktoe==0
*****

unique cou
duplicates drop 
isid cou year prod sect


* ADD old data in from before 2010
*************************
preserve

insheet using "IEA Energy Balances/Energy balances world.csv", nodouble comma clear
drop if v4==""
drop v4
compress
rename * value#, renumber(1969)
rename (value1969 value1970 value1971) (country sector product)
drop in 1/2
replace country="Korea, Republic of" if country=="Korea"
replace country="Slovakia" if country=="Slovak Republic"
replace country="United States of America" if country=="United States"
replace country="Venezuela (Bolivarian Republic of)" if country=="Venezuela"
replace country="China" if country=="People's Republic of China"
replace country="Taiwan, Republic of China" if country=="Chinese Taipei"
destring value1972-value2012, replace force float
drop value2012
reshape long value, i(country sector product) j(year)
rename value ktoe
replace ktoe=. if ktoe==0
isid country year product sector
*check if not Australia TE pre 2015 in this data:
sum ktoe if cou=="Australia" & sec=="Transport equipment" & year>2009
//zero , correct then
replace ktoe=. if cou=="Kazakhstan" & sec=="Non-ferrous metals" & product== "Electricity" & year==2008
tempfile oldtoe
save `oldtoe'
restore

merge 1:1 country year product sector using `oldtoe',update // update only the missing values

tab year if _m==1
tab year if _m==2
tab year if _m==3
tab year if _m==4
tab year if _m==5
tab _m if year==2010|year==2011

*have some renamed products, so dont want to have any double counts when we subsum them later under broader categories 
//so dont keep those from  using if in year 2010 or 2011, unless we have an update of a missing value
//  _m==5 appear to be rounding errors.
drop if _m==2 & year==2010
drop if _m==2 & year==2011

drop _m


drop if country=="Netherlands Antilles"


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*** 2) aggregate by type 
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$


*need to rectangularize, so EF applied to all missing sectors (eg Transport equipment in Australia in some years)
isid country sector year product
fillin country sector year product
tab year if _f==1 // also one in 2010-2014, so TE in AUS
tab sector if _f==1 // all equally 414 but Transport equipment 418
tab cou if _f==1 // all equally 42 but Australia 46 
*tab type if _f==1 // only biofuels, electricity and nat gas.
drop _f

gen type=""

replace type="Coal" if product=="Anthracite" | product== "Coking coal" | product== "Other bituminous coal" | product== "Sub-bituminous coal" | product== "Lignite" | product== "Patent fuel" | product== "Coke oven coke" | product== "Gas coke" | product== "Coal tar" | product== "BKB/peat briquettes" | product== "BKB"  | product== "Peat products" | product== "Gas works gas" | product== "Coke oven gas" | product== "Blast furnace gas" | product== "Other recovered gases" | product== "Elec/heat output from non-specified manufactured gases" | product== "Peat" | product== "Brown coal (if no detail)" | product== "Hard coal (if no detail)"

replace type="Oil products" if product== "Refinery gas" | product== "Ethane" | product== "Liquefied petroleum gases (LPG)" | product== "Motor gasoline" | product== "Aviation gasoline" | product== "Gasoline type jet fuel" | product== "Kerosene type jet fuel" | product== "Other Kerosene" | product== "Other kerosene"  | product== "Gas/diesel oil" | product== "Fuel oil" | product== "Naphtha" | product== "White spirit & SBP" | product== "Lubricants" | product== "Bitumen" | product== "Paraffin waxes" | product== "Petroleum coke" | product== "Non-specified oil products" | product== "Crude oil" | product== "Natural gas liquids" | product== "Refinery feedstocks" | product== "Additives/blending components" | product== "Other hydrocarbons" | product== "Crude/NGL/feedstocks (if no detail)" |  product== "Motor gasoline excl. biofuels"  | product== "Kerosene type jet fuel excl. biofuels" | product== "Gas/diesel oil excl. biofuels"  | product== "Oil shale and oil sands" | product== "Other oil products"

replace type="Biofuels and waste" if product== "Industrial waste" | product== "Municipal waste (renewable)" | product== "Municipal waste (non-renewable)" | product== "Primary solid biofuels" | product== "Biogases" | product== "Biogasoline" | product== "Biodiesels" | product== "Other liquid biofuels" | product== "Non-specified primary biofuels and waste" | product== "Charcoal"

replace type="Natural gas" if product=="Natural Gas" | product=="Natural gas"

replace type="Electricity and heat" if product== "Nuclear" | product== "Hydro" | product== "Geothermal" | product== "Solar photovoltaic" | product== "Solar thermal" | product== "Tide, wave and ocean" | product== "Wind" | product== "Heat pumps" | product== "Electric boilers" | product== "Heat from chemical sources" | product== "Other sources" | product== "Electricity" | product== "Heat" | product== "Heat output from non-specified combustible fuels" | product== "Solar photovoltaics"

drop if product== "Total" | product== "Memo: Renewables"

tab product if type==""
assert type!=""

*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*** 3A) Add emissions factor
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
//www.ipcc-nggip.iges.or.jp
// To provide continuity with previous editions of this publication and to fully account for fuel combustion emissions, the IEA CO2 emissions from fuel combus- tion include all emissions from fuel combustion, irre- spective of the category of reporting (Energy or IPPU) under the 2006 GLs.
rename product fuel
replace fuel="Natural gas" if fuel=="Natural Gas"
replace fuel="Other kerosene" if fuel=="Other Kerosene"

*replace so can match
replace fuel="Brown coal briquettes" if fuel=="BKB/peat briquettes"
replace fuel="Brown coal briquettes" if fuel=="BKB"
replace fuel="Other biogas" if fuel=="Biogases"
replace fuel="Coke oven coke and lignite coke" if fuel=="Coke oven coke"
replace fuel="Residual fuel oil" if fuel=="Fuel oil"
replace fuel="Gas/diesel oil" if fuel=="Gas/diesel oil excl. biofuels"
replace fuel="Industrial wastes" if fuel=="Industrial waste"
replace fuel="Jet kerosene" if fuel=="Kerosene type jet fuel"
replace fuel="Jet kerosene" if fuel=="Kerosene type jet fuel excl. biofuels"
replace fuel="Liquefied petroleum gases" if fuel=="Liquefied petroleum gases (LPG)"
replace fuel="Motor gasoline" if fuel=="Motor gasoline excl. biofuels"
replace fuel="Municipal wastes (non-biomass fraction)" if fuel=="Municipal waste (non-renewable)"
replace fuel="Municipal wastes (biomass fraction)" if fuel=="Municipal waste (renewable)"
replace fuel="Other petroleum products" if fuel=="Non-specified oil products"
replace fuel="Other primary solid biomass" if fuel=="Non-specified primary biofuels and waste"
replace fuel="Oil shale and tar sands" if fuel=="Oil shale and oil sands"
replace fuel="Other petroleum products" if fuel=="Other hydrocarbons"
replace fuel="Other petroleum products" if fuel=="Other oil products"
replace fuel="Oxygen steel furnace gas" if fuel=="Other recovered gases"
replace fuel="Peat" if fuel=="Peat products"
replace fuel="Other primary solid biomass" if fuel=="Primary solid biofuels"


merge m:1 fuel using  "IEAemission/IPCC_CO2_EF_manufacturing.dta"

tab fuel if _m==2 // ok. some we dont have/use see below
drop if _m==2
tab fuel if _m==1
tab fuel if _m==1 & type!="Electricity and heat" 
bysort fuel: egen notallmis=count(ktoe)
tab fuel if notallmis==0 // all missing

rename fuel product

tab product if _m==1 & type!="Electricity and heat"  & notallmis!=0


*take average for hard coal and brown coal over all categories
tabstat EF_manufacturing if type=="Coal" & ktoe!=.,by(sector)
*take average:
replace EF_manufacturing=94600 if product =="Brown coal (if no detail)"
replace EF_manufacturing=94600 if product =="Hard coal (if no detail)"

assert product=="Brown coal (if no detail)" | product=="Hard coal (if no detail)" if _m==1 & type!="Electricity and heat"  & notallmis!=0

tab product if EF_manufacturing==. & type!="Electricity and heat"  & notallmis!=0

drop notallmis

*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*** 3B) add in the electricity co2 per kwh
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
preserve
insheet using "IEAemission/IEA_CO2_AD_16102017163311316.csv", clear  
keep country time value
tab time

*harmonize names:
replace country="Korea, Republic of" if country=="Korea"
replace country="Slovakia" if country=="Slovak Republic"
replace country="United States of America" if country=="United States"
replace country="Venezuela (Bolivarian Republic of)" if country=="Venezuela"
replace country="China" if country=="People's Republic of China"
replace country="Taiwan, Republic of China" if country=="Chinese Taipei"
replace country="Vietnam" if country=="Viet Nam"
replace country="Cote d'Ivoire" if country=="C√¥te d'Ivoire"  | cou=="Côte d'Ivoire" // not needed with stata 14
replace country="Curaçao" if country=="Cura√ßao" // not needed with stata 14
replace country="United Republic of Tanzania" if country=="Tanzania"
replace country="Republic of Moldova" if country=="Moldova"
replace country="Hong Kong, China" if country=="Hong Kong (China)" // not needed
replace country="Democratic Republic of Congo" if country=="Democratic Republic of the Congo" // not needed
replace country="Democratic People's Republic of Korea" if country=="Dem. People's Republic of Korea"
replace country="Former Yugoslav Republic of Macedonia" if country=="FYR of Macedonia"
tab cou

*drop 
drop if country=="Suriname" | country=="South Sudan" | country=="Niger" | country=="Mauritius" | country=="Curaçao"
rename time year
label var value "CO2 per kWh of electricity and heat (gCO2 per kWh)"
rename value EF_electricity
save "IEAemission/IEA_CO2",replace
restore

drop _m
merge m:1 country year using "IEAemission/IEA_CO2"
 
tab cou if _m==2
assert cou=="World" if _m==2
drop if _m==2 //world

tab cou if year==2010 & _m==1
tab year if _m==1
assert cou=="" if EF_electricity==. & year==2013
assert cou=="" if EF_electricity==. & year==2010
//from 2005 till 2013 have all observations except
// for EF_other have all non-missing anyways.
drop _m

*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*** 3C) calculate emissions
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

*translate into ktoe:
// one TJ is 0.0238845 ktoe

replace EF_manufacturing=EF_manufacturing/0.0238845 // kg per ktoe
replace EF_manufacturing=EF_manufacturing/1000 // in ton co2 per ktoe

*electricity:
replace EF_electricity=EF_electricity/8.59845e-8 // g per ktoe
replace EF_electricity=EF_electricity/1000 // in kg per ktoe
replace EF_electricity=EF_electricity/1000 // in ton co2 per ktoe

**calculate aggregate co2 emissions per ktoe per category:
gen temp=EF_manufacturing*ktoe
bysort country sector year type: egen co2emissiontot=total(temp),missing
bysort country sector year type: egen ktoe_type=total(ktoe), missing
gen EF_manufacturing_tot=co2emissiontot/ktoe_type // this is the average EF by fuel aggregate

*for industry unido:
bysort country year type: egen co2emissiontot_unido=total(temp) if sector=="Textile and leather" | sector=="Iron and steel" | sector=="Chemical and petrochemical" | sector=="Non-ferrous metals" | sector=="Non-metallic minerals" | sector=="Transport equipment" | sector=="Machinery" | sector=="Food and tobacco" | sector=="Paper, pulp and print" | sector=="Wood and wood products" , missing
//use later below

drop product ktoe temp EF_manufacturing

duplicates drop

*checks
tab year if EF_manufacturing_tot!=.
tab year if EF_electricity!=.

replace EF_manufacturing_tot=EF_electricity if type=="Electricity and heat"
drop EF_electricity
rename EF_manufacturing_tot EF
label var EF "Emission factor ton CO2 per ktoe"

tabstat EF, by(type) stats(mean median)
tab cou if EF>8000 & type=="Electricity and heat" & EF!=.

isid cou year type sec

*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
* 4) replace missings with i) linear interpolation  ii) constant extrapolation iii) cross country sector average if missing for all types
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
// EF also interpolated and extrapolated.

drop if year<1978

recast double ktoe_type

ipolate ktoe year, generate(ktoeipol) by(country sector type)

ipolate EF year, generate(EFipol) by(country sector type)

sort countr sec type year

bysort sector type year: egen cc_avg=mean(ktoe_type)

gen double ktoe_extra= ktoeipol
gen double EF_extra= EFipol

egen id=group(country sector type)

xtset id year

sort id year

replace  ktoe_extra=l.ktoe_extra if ktoe_extra==.
replace  EF_extra=l.EF_extra if EF_extra==.

forvalues i=1/50{
replace  ktoe_extra=f.ktoe_extra if ktoe_extra==.
replace  EF_extra=f.EF_extra if EF_extra==.

}

misstable sum ktoe_type ktoeipol ktoe_extra if type!="Biofuels and waste" & year>1994

drop id
compress

tab year
tab sector
tab country
// all complete and equal


*create industry for unido: (MANUFACTURING)

expand 2 in l
replace sec="Industry Unido" in l
foreach x in cc_avg ktoe_extra ktoe_type ktoeipol EF EFipol EF_extra{
replace `x'=. in l
}
fillin country sector year type
assert sec=="Industry Unido" if _fillin==1
tab year if _fillin==1
tab cou if _fillin==1
tab type if _fillin==1

*only create industry unido if all components are there, possibly polated, rather than polating afterwards
bysort cou year type: egen ktoe_extra_indun=total(ktoe_extra)  if sector=="Textile and leather" | sector=="Iron and steel" | sector=="Chemical and petrochemical" | sector=="Non-ferrous metals" | sector=="Non-metallic minerals" | sector=="Transport equipment" | sector=="Machinery" | sector=="Food and tobacco" | sector=="Paper, pulp and print" | sector=="Wood and wood products" , missing
bysort cou year type: egen check=count(ktoe_extra) if sector=="Textile and leather" | sector=="Iron and steel" | sector=="Chemical and petrochemical" | sector=="Non-ferrous metals" | sector=="Non-metallic minerals" | sector=="Transport equipment" | sector=="Machinery" | sector=="Food and tobacco" | sector=="Paper, pulp and print" | sector=="Wood and wood products" 
tab check
replace ktoe_extra_indun=. if check!=10
bysort cou year type: egen ktoe_extra_indun2=max(ktoe_extra_indun) 
sum ktoe_extra_indun2 // ok: 0.5*60420+60420 obs
replace ktoe_extra=ktoe_extra_indun2 if sector=="Industry Unido"

*for EF, based on ktoe extra, not on subtype fuels,  but is equivalent, linear operator
g temp=EF_extra*ktoe_extra/ktoe_extra_indun2  if sector=="Textile and leather" | sector=="Iron and steel" | sector=="Chemical and petrochemical" | sector=="Non-ferrous metals" | sector=="Non-metallic minerals" | sector=="Transport equipment" | sector=="Machinery" | sector=="Food and tobacco" | sector=="Paper, pulp and print" | sector=="Wood and wood products" 
// so only where all availabe
bysort cou year type: egen check2=count(temp) if sector=="Textile and leather" | sector=="Iron and steel" | sector=="Chemical and petrochemical" | sector=="Non-ferrous metals" | sector=="Non-metallic minerals" | sector=="Transport equipment" | sector=="Machinery" | sector=="Food and tobacco" | sector=="Paper, pulp and print" | sector=="Wood and wood products" 
tab check2
replace temp=. if check2!=10
bysort cou year type: egen temp2=total(temp), missing
replace EF_extra=temp2 if sector=="Industry Unido"

drop _fillin ktoe_extra_indun check ktoe_extra_indun2  co2emissiontot_unido temp temp2 check check2 co2emissiontot

assert EF_extra!=. if ktoe_extra!=.

sort cou year sec type

***Create flag for ktoe, on a cou year sec basis
g flag_ktoe=0 if ktoe_type!=.
replace flag_ktoe=1 if ktoeipol!=. & ktoe_type==.
replace flag_ktoe=2 if ktoe_extra!=. & ktoeipol==.
bysort cou year sec: egen flag_ktoe2=max(flag_ktoe)
drop flag_ktoe
rename flag_ktoe2 flag_ktoe
label var flag_ktoe "Energy use oberved (0), interpolated (1), extrapolated (2)"
// only 0 if all obs, if one observations was 1 or 2 then highest (so 2) is used.

save "IPOL Fuel type use per sector_agregated", replace
